BiocManager::install("fgsea")
BiocManager::install('limma')
devtools::install_github("jevanveen/ratplots", force = T)

#load required packages 
library(tidyverse)
library(Seurat)
library(RColorBrewer)
library(fgsea)
library(cowplot)
library(ggrepel)
library(Hmisc)
library(ratplots)

packageVersion("fgsea")
packageVersion("ratplots")

#import and normalize data ####

WTV1.data <- Read10X(data.dir = "~/Dropbox/JEV/Zhi single cell paper/CorreaDropSeq-selected/Output/S1")
WTV2.data <- Read10X(data.dir = "~/Dropbox/JEV/Zhi single cell paper/CorreaDropSeq-selected/Output/S10")
WTV3.data <- Read10X(data.dir = "~/Dropbox/JEV/Zhi single cell paper/CorreaDropSeq-selected/Output/S5")

WTV1 <- CreateSeuratObject(WTV1.data, project = "wtv1")
WTV2 <- CreateSeuratObject(WTV2.data, project = "wtv2")
WTV3 <- CreateSeuratObject(WTV3.data, project = "wtv3")

WTV1 <- PercentageFeatureSet(object = WTV1, pattern = "^mt-", col.name = "percent.mt")
WTV2 <- PercentageFeatureSet(object = WTV2, pattern = "^mt-", col.name = "percent.mt")
WTV3 <- PercentageFeatureSet(object = WTV3, pattern = "^mt-", col.name = "percent.mt")

WTV1 <- NormalizeData(WTV1)
WTV2 <- NormalizeData(WTV2)
WTV3 <- NormalizeData(WTV3)


WTT1.data <- Read10X(data.dir = "~/Dropbox/JEV/Zhi single cell paper/CorreaDropSeq-selected/Output/S3")
WTT2.data <- Read10X(data.dir = "~/Dropbox/JEV/Zhi single cell paper/CorreaDropSeq-selected/Output/S8")
WTT3.data <- Read10X(data.dir = "~/Dropbox/JEV/Zhi single cell paper/CorreaDropSeq-selected/Output/S9")
WTT4.data <- Read10X(data.dir = "~/Dropbox/JEV/Zhi single cell paper/CorreaDropSeq-selected/Output/S13")
WTT5.data <- Read10X(data.dir = "~/Dropbox/JEV/Zhi single cell paper/CorreaDropSeq-selected/Output/S16")

WTT1 <- CreateSeuratObject(WTT1.data, project = "wtt1")
WTT2 <- CreateSeuratObject(WTT2.data, project = "wtt2")
WTT3 <- CreateSeuratObject(WTT3.data, project = "wtt3")
WTT4 <- CreateSeuratObject(WTT4.data, project = "wtt4")
WTT5 <- CreateSeuratObject(WTT5.data, project = "wtt5")

WTT1 <- NormalizeData(WTT1)
WTT2 <- NormalizeData(WTT2)
WTT3 <- NormalizeData(WTT3)
WTT4 <- NormalizeData(WTT4)
WTT5 <- NormalizeData(WTT5)


MUTV1.data <- Read10X(data.dir = "~/Dropbox/JEV/Zhi single cell paper/CorreaDropSeq-selected/Output/S4")
MUTV2.data <- Read10X(data.dir = "~/Dropbox/JEV/Zhi single cell paper/CorreaDropSeq-selected/Output/S7")
MUTV3.data <- Read10X(data.dir = "~/Dropbox/JEV/Zhi single cell paper/CorreaDropSeq-selected/Output/S12")
MUTV4.data <- Read10X(data.dir = "~/Dropbox/JEV/Zhi single cell paper/CorreaDropSeq-selected/Output/S14")

MUTV1 <- CreateSeuratObject(MUTV1.data, project = "mutv1")
MUTV2 <- CreateSeuratObject(MUTV2.data, project = "mutv2")
MUTV3 <- CreateSeuratObject(MUTV3.data, project = "mutv3")
MUTV4 <- CreateSeuratObject(MUTV4.data, project = "mutv4")

MUTV1 <- NormalizeData(MUTV1)
MUTV2 <- NormalizeData(MUTV2)
MUTV3 <- NormalizeData(MUTV3)
MUTV4 <- NormalizeData(MUTV4)





MUTT1.data <- Read10X(data.dir = "~/Dropbox/JEV/Zhi single cell paper/CorreaDropSeq-selected/Output/S2")
MUTT2.data <- Read10X(data.dir = "~/Dropbox/JEV/Zhi single cell paper/CorreaDropSeq-selected/Output/S11")
MUTT3.data <- Read10X(data.dir = "~/Dropbox/JEV/Zhi single cell paper/CorreaDropSeq-selected/Output/S6")
MUTT4.data <- Read10X(data.dir = "~/Dropbox/JEV/Zhi single cell paper/CorreaDropSeq-selected/Output/S15")

MUTT1 <- CreateSeuratObject(MUTT1.data, project = "mutt1")
MUTT2 <- CreateSeuratObject(MUTT2.data, project = "mutt2")
MUTT3 <- CreateSeuratObject(MUTT3.data, project = "mutt3")
MUTT4 <- CreateSeuratObject(MUTT4.data, project = "mutt4")

MUTT1 <- NormalizeData(MUTT1)
MUTT2 <- NormalizeData(MUTT2)
MUTT3 <- NormalizeData(MUTT3)
MUTT4 <- NormalizeData(MUTT4)

#merge datasets####

WTV <- merge(WTV1, y = c(WTV2, WTV3), add.cell.ids = c("wtv1", "wtv2", "wtv3"), 
             project = "wtv", merge.data = T)

WTT <- merge(WTT1, y = c(WTT2, WTT3, WTT4, WTT5), add.cell.ids = c("wtt1", "wtt2", "wtt3", "wtt4", "wtt5"), 
             project = "wtt", merge.data = T)

table(WTT@meta.data$orig.ident)

MUTV <- merge(MUTV1, y = c(MUTV2, MUTV3, MUTV4), add.cell.ids = c("mutv1", "mutv2", "mutv3", "mutv4"), 
             project = "mutv", merge.data = T)

MUTT <- merge(MUTT1, y = c(MUTT2, MUTT3, MUTT4), add.cell.ids = c("mutt1", "mutt2", "mutt3", "mutt4"), 
             project = "mutt", merge.data = T)


table(MUTV@meta.data$orig.ident)
table(MUTT@meta.data$orig.ident)

rm(WTV1, WTV1.data, WTV2, WTV2.data, WTV3, WTV3.data, WTT1, WTT1.data, WTT2, WTT2.data,
   WTT3, WTT3.data, WTT4, WTT4.data, WTT5, WTT5.data, MUTV1, MUTV1.data, MUTV2, MUTV2.data,
   MUTV3, MUTV3.data, MUTV4, MUTV4.data, MUTT1, MUTT1.data, MUTT2, MUTT2.data, MUTT3, MUTT3.data,
   MUTT4, MUTT4.data)


WTV@meta.data$orig.ident <- as.factor(WTV@meta.data$orig.ident)
WTT@meta.data$orig.ident <- as.factor(WTT@meta.data$orig.ident)
MUTV@meta.data$orig.ident <- as.factor(MUTV@meta.data$orig.ident)
MUTT@meta.data$orig.ident <- as.factor(MUTT@meta.data$orig.ident)



table(WTV@meta.data$orig.ident)
table(WTT@meta.data$orig.ident)
table(MUTV@meta.data$orig.ident)
table(MUTT@meta.data$orig.ident)


#quality filtering and preprocessing####

WTV <- PercentageFeatureSet(object = WTV, pattern = "^mt-", col.name = "percent.mt")
WTT <- PercentageFeatureSet(object = WTT, pattern = "^mt-", col.name = "percent.mt")

MUTV <- PercentageFeatureSet(object = MUTV, pattern = "^mt-", col.name = "percent.mt")
MUTT <- PercentageFeatureSet(object = MUTT, pattern = "^mt-", col.name = "percent.mt")


par(mfrow = c(1, 2))

VlnPlot(object = WTV, features = c("nFeature_RNA", "nCount_RNA", "percent.mt"), pt.size = 0)

VlnPlot(object = WTT, features = c("nFeature_RNA", "nCount_RNA", "percent.mt"), pt.size = 0)

VlnPlot(object = MUTT, features = c("nFeature_RNA", "nCount_RNA", "percent.mt"), pt.size = 0)

VlnPlot(object = MUTV, features = c("nFeature_RNA", "nCount_RNA", "percent.mt"), pt.size = 0)

VlnPlot(object = WTV, features = c("nFeature_RNA"), pt.size = 0) + ylim(0,1000)


WTV <- subset(x = WTV, subset = nFeature_RNA > 200 & nFeature_RNA < 4000 & percent.mt < 15)

WTT <- subset(x = WTT, subset = nFeature_RNA > 200 & nFeature_RNA < 4000 & percent.mt < 15)

MUTV <- subset(x = MUTV, subset = nFeature_RNA > 200 & nFeature_RNA < 4000 & percent.mt < 15)

MUTT <- subset(x = MUTT, subset = nFeature_RNA > 200 & nFeature_RNA < 4000 & percent.mt < 15)

#how many cells pass filtering? 
table(WTV@meta.data$orig.ident)
table(MUTV@meta.data$orig.ident)
table(WTT@meta.data$orig.ident)
table(MUTT@meta.data$orig.ident)

#find variable features####
WTV <- FindVariableFeatures(WTV, selection.method = "vst", nfeatures = 2000, verbose = FALSE)
WTT <- FindVariableFeatures(WTT, selection.method = "vst", nfeatures = 2000, verbose = FALSE)
MUTV <- FindVariableFeatures(MUTV, selection.method = "vst", nfeatures = 2000, verbose = FALSE)
MUTT <- FindVariableFeatures(MUTT, selection.method = "vst", nfeatures = 2000, verbose = FALSE)

#add treatment group metadata####
WTV <- AddMetaData(object = WTV, metadata = "Vehicle", col.name = "group")
WTT <- AddMetaData(object = WTT, metadata = "Tamoxifen", col.name = "group")
MUTV <- AddMetaData(object = MUTV, metadata = "Vehicle", col.name = "group")
MUTT <- AddMetaData(object = MUTT, metadata = "Tamoxifen", col.name = "group")

WTV <- AddMetaData(object = WTV, metadata = "WtVehicle", col.name = "cond")
WTT <- AddMetaData(object = WTT, metadata = "WtTamoxifen", col.name = "cond")
MUTV <- AddMetaData(object = MUTV, metadata = "MutVehicle", col.name = "cond")
MUTT <- AddMetaData(object = MUTT, metadata = "MutTamoxifen", col.name = "cond")

WTV <- AddMetaData(object = WTV, metadata = "Wild-Type", col.name = "geno")
WTT <- AddMetaData(object = WTT, metadata = "Wild-Type", col.name = "geno")
MUTV <- AddMetaData(object = MUTV, metadata = "Mutant", col.name = "geno")
MUTT <- AddMetaData(object = MUTT, metadata = "Mutant", col.name = "geno")


#all cells integration ####

all.cells.list <- list()
all.cells.list[[1]] <- WTV
all.cells.list[[2]] <- WTT
all.cells.list[[3]] <- MUTV
all.cells.list[[4]] <- MUTT

options(future.globals.maxSize= 3670016000)

all.cells.anchors <- FindIntegrationAnchors(object.list = all.cells.list, dims = 1:30)

all.cells.integrated <- IntegrateData(anchorset = all.cells.anchors, dims = 1:30)

saveRDS(all.cells.integrated, "~/Dropbox/JEV/R/Tamoxifen2/allcellsv20.rds")

#clean up workspace
rm(WTV, WTT, MUTV, MUTT, all.cells.anchors, all.cells.list)

DefaultAssay(all.cells.integrated) <- "integrated"
DefaultAssay(all.cells.integrated)
table(all.cells.integrated@meta.data$orig.ident)
table(all.cells.integrated@meta.data$geno)
table(all.cells.integrated@meta.data$cond)

#scale data and perform clustering####
all.cells.integrated <- ScaleData(all.cells.integrated, verbose = FALSE)
all.cells.integrated <- RunPCA(object = all.cells.integrated, npcs = 30, verbose = FALSE)
ElbowPlot(all.cells.integrated)
all.cells.integrated <- FindNeighbors(object = all.cells.integrated, dims = 1:20)
all.cells.integrated <- FindClusters(object = all.cells.integrated, resolution = .05)
all.cells.integrated <- RunUMAP(object = all.cells.integrated, reduction = "pca", dims = 1:20)
all.cells.integrated <- RunTSNE(object = all.cells.integrated, reduction = "pca", dims = 1:20)

#UMAP and TSNE plots ####

DimPlot(object = all.cells.integrated, reduction = "umap", label = F, pt.size = .01)

DimPlot(object = all.cells.integrated, reduction = "tsne", label = F, pt.size = .01)


#export all.cells.integrated for shiny server
saveRDS(all.cells.integrated, "~/Dropbox/JEV/R/tamoxifenShiny/allcells.rds")



p1 <- DimPlot(object = all.cells.integrated, reduction = "umap", label = F, pt.size = .01)
p2 <- DimPlot(object = all.cells.integrated, reduction = "tsne", label = F, pt.size = .01)

plot_grid(p1, p2)


DimPlot(object = all.cells.integrated, reduction = "umap", label = F, group.by = "cond", pt.size = .01,  
        cells = c(WhichCells(object = subset(x = all.cells.integrated, subset = cond == "WtVehicle"), downsample = 100), 
                  WhichCells(object = subset(x = all.cells.integrated, subset = cond == "WtTamoxifen"), downsample = 100)))


#exported UMAPs
p1 <- DimPlot(object = all.cells.integrated, reduction = "umap", label = F, group.by = "cond", pt.size = .01,  
              cells = c(WhichCells(object = subset(x = all.cells.integrated, subset = cond == "WtVehicle"), downsample = 250), 
                        WhichCells(object = subset(x = all.cells.integrated, subset = cond == "WtTamoxifen"), downsample = 250)))
p5 <- DimPlot(object = all.cells.integrated, reduction = "umap", label = F, pt.size = .01,  
              cells = c(WhichCells(object = subset(x = all.cells.integrated, subset = cond == "WtVehicle"), downsample = 1000), 
                        WhichCells(object = subset(x = all.cells.integrated, subset = cond == "WtTamoxifen"), downsample = 1000)))

p6 <- DimPlot(object = all.cells.integrated, reduction = "umap", label = F, group.by = "cond", 
        pt.size = .01, cells = WhichCells(object = all.cells.integrated, downsample = 2000))




save_ploty("~/Box/Tamoxifen paper/v20umapwtvtmx.png", plot = p1, base_asp = 1.5, base_height = 3)
save_ploty("~/Box/Tamoxifen paper/v20umapWTcells.png", plot = p5, base_asp = 1.5, base_height = 3)
save_ploty("~/Box/Tamoxifen paper/v20umap4groups.png", plot = p6, base_asp = 1.4, base_height = 3.5)


#exported tSNEs

p1 <- DimPlot(object = all.cells.integrated, reduction = "tsne", label = T, pt.size = .01) + NoLegend()
p2 <- DimPlot(object = all.cells.integrated, reduction = "tsne", label = F, group.by = "cond", pt.size = .01,  
        cells = c(WhichCells(object = subset(x = all.cells.integrated, subset = cond == "WtVehicle"), downsample = 250), 
                  WhichCells(object = subset(x = all.cells.integrated, subset = cond == "WtTamoxifen"), downsample = 250))) + NoLegend()

plot_grid(p1,p2)

save_ploty("~/Box/Tamoxifen paper/v20tsneclusters.png", plot = p1, base_asp = 1, base_height = 3)
save_ploty("~/Box/Tamoxifen paper/v20tsnewtvtmx.png", plot = p2, base_asp = 1, base_height = 3)



#test esr1 / nkx coexpression
DefaultAssay(all.cells.integrated) <- "RNA"
FeatureScatter(all.cells.integrated, feature1 = "Nkx2-1", feature2 = "Esr1")
temp <- subset(x = all.cells.integrated, subset = Esr1 > 0 & `Nkx2-1` >0)

FeatureScatter(temp, feature1 = "Nkx2-1", feature2 = "Esr1")
table(temp@active.ident) %>% sum()


#cell type marker identification and renaming clusters####
all.cells.markers <- FindAllMarkers(all.cells.integrated, logfc.threshold = 1, min.pct = .1)
all.cells.markers.top <- top_markers(all.cells.markers, n_markers = 10)




#test esr1 enrichment. This should not include KOs.
esr.marker.test <- FindAllMarkers(subset(x = all.cells.integrated, subset = geno == "Wild-Type"), 
                                  features = c("Esr1", "Esr2", "Gper1", "Nkx2-1", "Pgr"), 
                                  min.pct = 0, logfc.threshold = 0, only.pos = T)


#Top cluster marker violins
DefaultAssay(all.cells.integrated) <- "RNA"
DefaultAssay(all.cells.integrated)
c1 <- VlnPlot(object = all.cells.integrated, features = c( "Itih3"), ncol = 1, pt.size = 0)  + NoLegend() 
c2 <- VlnPlot(object = all.cells.integrated, features = c( "Plp1"), ncol = 1, pt.size = 0)  + NoLegend() 
c3 <- VlnPlot(object = all.cells.integrated, features = c( "Meg3"), ncol = 1, pt.size = 0)  + NoLegend() 
c4 <- VlnPlot(object = all.cells.integrated, features = c( "Flt1"), ncol = 1, pt.size = 0)  + NoLegend() 
c5 <- VlnPlot(object = all.cells.integrated, features = c( "Ctss"), ncol = 1, pt.size = 0)  + NoLegend() 
c6 <- VlnPlot(object = all.cells.integrated, features = c( "Pdgfra"), ncol = 1, pt.size = 0)  + NoLegend() 
c7 <- VlnPlot(object = all.cells.integrated, features = c( "Tmem212"), ncol = 1, pt.size = 0)  + NoLegend() 
c8 <- VlnPlot(object = all.cells.integrated, features = c( "Acta2"), ncol = 1, pt.size = 0)  + NoLegend() 
c9 <- VlnPlot(object = all.cells.integrated, features = c( "Dcn"), ncol = 1, pt.size = 0)  + NoLegend() 



save_ploty("~/Box/Tamoxifen paper/v20cluster_itih3.png", plot = c1, base_asp = 5, base_height = 1, strip_text = T, line_width = 1)
save_ploty("~/Box/Tamoxifen paper/v20cluster_plp1.png", plot = c2, base_asp = 5, base_height = 1, strip_text = T, line_width = 1)
save_ploty("~/Box/Tamoxifen paper/v20cluster_meg3.png", plot = c3, base_asp = 5, base_height = 1, strip_text = T, line_width = 1)
save_ploty("~/Box/Tamoxifen paper/v20cluster_flt1.png", plot = c4, base_asp = 5, base_height = 1, strip_text = T, line_width = 1)
save_ploty("~/Box/Tamoxifen paper/v20cluster_ctss.png", plot = c5, base_asp = 5, base_height = 1, strip_text = T, line_width = 1)
save_ploty("~/Box/Tamoxifen paper/v20cluster_pdgfra.png", plot = c6, base_asp = 5, base_height = 1, strip_text = T, line_width = 1)
save_ploty("~/Box/Tamoxifen paper/v20cluster_tmem212.png", plot = c7, base_asp = 5, base_height = 1, strip_text = T, line_width = 1)
save_ploty("~/Box/Tamoxifen paper/v20cluster_acta2.png", plot = c8, base_asp = 5, base_height = 1, strip_text = T, line_width = 1)
save_ploty("~/Box/Tamoxifen paper/v20cluster_dcn.png", plot = c9, base_asp = 5, base_height = 1, strip_text = T, line_width = 1)

#clean workspace
rm(c1, c2, c3, c4, c5, c6, c7, c8, c9)

#rename clusters
new.cluster.ids <- c("Astro", "Oligo", "Neuron", "Endo", "Microglia", "Polydendro", "Ependymal", "Mural", "Fibro")
names(new.cluster.ids) <- levels(all.cells.integrated)
all.cells.integrated <- RenameIdents(all.cells.integrated, new.cluster.ids)
DimPlot(all.cells.integrated, reduction = "umap", label = TRUE, pt.size = 0.5) + NoLegend()



dp <- DotPlot(object = subset(x = all.cells.integrated, subset = cond == "WtVehicle"), features = c("Esr1", "Esr2", "Pgr", "Gper1")) + theme(axis.text.x=element_text(face="italic")) + RotatedAxis() 
dpko <- DotPlot(object = subset(x = all.cells.integrated, subset = cond == "WtVehicle"), features = c("Nkx2-1", "Esr1")) + 
  theme(axis.text.x=element_text(face="italic")) + RotatedAxis() + 
  scale_y_discrete(limits = rev(levels(all.cells.integrated@active.ident)))

save_ploty("~/Box/Tamoxifen paper/v20esrDotPlot.tiff", plot = dp, base_height = 2.8, base_asp = 1.25)
save_ploty("~/Box/Tamoxifen paper/v20esrDotPlotNkx2.tiff", plot = dpko, base_height = 2.25, base_asp = 1.5)


table(all.cells.integrated@active.ident)

#find all cells cluster DEGs ####

DefaultAssay(all.cells.integrated) <- "RNA"
wt.cells.tmx.response <- cluster_DEGs(all.cells.integrated, 
                                      condition_1 = "WtTamoxifen", condition_2 = "WtVehicle", 
                                      meta_slot = "cond", logfc.threshold = .01, min.pct = .001)
mut.cells.tmx.response <- cluster_DEGs(all.cells.integrated, 
                                       condition_1 = "MutTamoxifen", condition_2 = "MutVehicle", 
                                       meta_slot = "cond", logfc.threshold = .01, min.pct = .001)
wt.cells.mut.response <- cluster_DEGs(all.cells.integrated, 
                                      condition_1 = "MutVehicle", condition_2 = "WtVehicle", 
                                      meta_slot = "cond", logfc.threshold = .01, min.pct = .001)

DEGs_UpDown_Plot(wt.cells.tmx.response) 

DEGs_UpDown_Plot(wt.cells.tmx.response, return_table = T) 
  group_by(Cluster) %>%
  summarise(total = sum(value))


DEGs_UpDown_Plot(mut.cells.tmx.response, return_table = T) 
  
  

#plot sig DEGs vs cluster size####

DEGs_vs_Size <- function(object, Cluster_DEG_list){
  t <- DEGs_UpDown_Plot(Cluster_DEG_list, return_table = T) %>%
    group_by(Cluster) %>%
    summarise(total = sum(value))
  temp1 <- table(object@active.ident) %>% as.data.frame() %>%
    bind_cols(t) %>%
    mutate(DEGsPer1K = (total / Freq) * 1000)
  p1 <- ggplot(data = temp1, aes(x = total, y = DEGsPer1K, color = Var1)) +
    geom_point() + 
    theme_classic()
  return(p1)
}

p1 <- DEGs_vs_Size(all.cells.integrated, wt.cells.tmx.response)

save_ploty("~/Box/Tamoxifen paper/v20_DEGsVSclustersize.png", plot = p1, base_asp = 1.75, base_height = 2.5)




#gseas for all cells####

#first load gsea pathways

hallmarks <- gmtPathways("~/Dropbox/JEV/GSEA/h.all.v6.2.symbols.gmt.txt")
gobioprocess <- gmtPathways("~/Dropbox/JEV/GSEA/c5.bp.v6.2.symbols.gmt.txt")
e2response <- list()
e2response[["GO_RESPONSE_TO_ESTROGEN"]] <- gobioprocess$GO_RESPONSE_TO_ESTROGEN
e2response[["GO_RESPONSE_TO_ESTRADIOL"]] <- gobioprocess$GO_RESPONSE_TO_ESTRADIOL
e2response[["GO_RESPONSE_TO_STEROID_HORMONE"]] <- gobioprocess$GO_RESPONSE_TO_STEROID_HORMONE
e2response[["STEIN_ESR1_TARGETS"]] <- cgp$STEIN_ESR1_TARGETS
e2response[["FRASOR_RESPONSE_TO_ESTRADIOL_UP"]] <- cgp$FRASOR_RESPONSE_TO_ESTRADIOL_UP
e2response[["FRASOR_RESPONSE_TO_ESTRADIOL_DN"]] <- cgp$FRASOR_RESPONSE_TO_ESTRADIOL_DN
e2response[["HALLMARK_ESTROGEN_RESPONSE_EARLY"]] <- hallmarks$HALLMARK_ESTROGEN_RESPONSE_EARLY
e2response[["HALLMARK_ESTROGEN_RESPONSE_LATE"]] <- hallmarks$HALLMARK_ESTROGEN_RESPONSE_LATE


#here define the list of hallmarks plus neuro pathways for most testing
neuroplus <- hallmarks
neuroplus$HALLMARK_MITOTIC_SPINDLE <- NULL
neuroplus$HALLMARK_DNA_REPAIR <- NULL
neuroplus$HALLMARK_PANCREAS_BETA_CELLS <- NULL
neuroplus$HALLMARK_G2M_CHECKPOINT <- NULL
neuroplus$HALLMARK_ADIPOGENESIS <- NULL
neuroplus$HALLMARK_MYOGENESIS <- NULL
neuroplus$HALLMARK_ESTROGEN_RESPONSE <- union(hallmarks$HALLMARK_ESTROGEN_RESPONSE_EARLY, hallmarks$HALLMARK_ESTROGEN_RESPONSE_LATE)
neuroplus$HALLMARK_ESTROGEN_RESPONSE_EARLY <- NULL
neuroplus$HALLMARK_ESTROGEN_RESPONSE_LATE <- NULL
neuroplus$HALLMARK_APICAL_JUNCTION <- NULL
neuroplus$HALLMARK_APICAL_SURFACE <- NULL
neuroplus$HALLMARK_COMPLEMENT <- NULL
neuroplus$HALLMARK_UNFOLDED_PROTEIN_RESPONSE <- NULL
neuroplus$HALLMARK_EPITHELIAL_MESENCHYMAL_TRANSITION <- NULL
neuroplus$HALLMARK_XENOBIOTIC_METABOLISM <- NULL
neuroplus$HALLMARK_UV_RESPONSE_UP <- NULL
neuroplus$HALLMARK_COAGULATION <- NULL
neuroplus$HALLMARK_UV_RESPONSE_DN <- NULL
neuroplus$HALLMARK_BILE_ACID_METABOLISM <- NULL
neuroplus$HALLMARK_SPERMATOGENESIS <- NULL
neuroplus$HALLMARK_ALLOGRAFT_REJECTION <- NULL
neuroplus$HALLMARK_HEME_METABOLISM <- NULL

neuroplus$GO_NEUROPEPTIDE_SIGNALING_PATHWAY <- gobioprocess$GO_NEUROPEPTIDE_SIGNALING_PATHWAY
neuroplus$GO_NEURON_APOPTOTIC_PROCESS <- gobioprocess$GO_NEURON_APOPTOTIC_PROCESS
neuroplus$GO_NEURON_DEVELOPMENT <- gobioprocess$GO_NEURON_DEVELOPMENT
neuroplus$GO_NEUROTRANSMITTER_TRANSPORT <- gobioprocess$GO_NEUROTRANSMITTER_TRANSPORT
neuroplus$GO_REGULATION_OF_NEURON_DIFFERENTIATION <- gobioprocess$GO_REGULATION_OF_NEURON_DIFFERENTIATION
neuroplus$GO_ASTROCYTE_DIFFERENTIATION <- gobioprocess$GO_ASTROCYTE_DIFFERENTIATION
neuroplus$GO_OLIGODENDROCYTE_DIFFERENTIATION <- gobioprocess$GO_OLIGODENDROCYTE_DIFFERENTIATION
neuroplus$GO_ESTABLISHMENT_OF_ENDOTHELIAL_BARRIER <- gobioprocess$GO_ESTABLISHMENT_OF_ENDOTHELIAL_BARRIER
neuroplus$GO_ENDOTHELIAL_CELL_PROLIFERATION <- gobioprocess$GO_ENDOTHELIAL_CELL_PROLIFERATION
neuroplus$GO_FIBROBLAST_PROLIFERATION <- gobioprocess$GO_FIBROBLAST_PROLIFERATION

length(neuroplus)

write.csv(names(neuroplus), "~/Box/Tamoxifen paper/neuroplusgenesets.csv")



#now run GSEAs


allcells.wttmx.gsea.estrogen <- Cluster_GSEA(wt.cells.tmx.response, 
                                        geneset = list(e2response$GO_RESPONSE_TO_ESTROGEN, sig.snapshot = F))

p1 <- cluster_GSEA_tree_plot(allcells.wttmx.gsea.estrogen)

save_ploty("~/Box/Tamoxifen paper/estrogenresponse.png", plot = p1, base_asp = 1.1, base_height = 2.75)

allcells.muttmx.gsea.estrogen <- Cluster_GSEA(mut.cells.tmx.response, 
                                             geneset = list(e2response$GO_RESPONSE_TO_ESTROGEN, sig.snapshot = F))

cluster_2test_GSEA_tree(allcells.wttmx.gsea.estrogen, allcells.muttmx.gsea.estrogen)




allcells.wttmx.gsea.neuroplus <- Cluster_GSEA(wt.cells.tmx.response, geneset = neuroplus, sig.snapshot = F)
allcells.muttmx.gsea.neuroplus <- Cluster_GSEA(mut.cells.tmx.response, geneset = neuroplus, sig.snapshot = F)

cluster_2test_GSEA_tree(allcells.wttmx.gsea.neuroplus, allcells.muttmx.gsea.neuroplus)

#format leading edge as comma list for exporting csv
allcells.wttmx.gsea.neuroplus$leadingEdge <- lapply(allcells.wttmx.gsea.neuroplus$leadingEdge, FUN = paste, collapse = ",") 
allcells.muttmx.gsea.neuroplus$leadingEdge <- lapply(allcells.muttmx.gsea.neuroplus$leadingEdge, FUN = paste, collapse = ",") 

allcells.wttmx.gsea.neuroplus %>%
  writexl::write_xlsx("~/Box/Tamoxifen paper/allcellsGSEAneuroplus.xlsx")
allcells.muttmx.gsea.neuroplus %>%
  writexl::write_xlsx("~/Box/Tamoxifen paper/allcellsGSEAneuroplusmuttemp.xlsx")

p1 <- Volcano_Plot_GS(wt.cells.tmx.response$Neuron, 
                label_features = hallmarks$HALLMARK_OXIDATIVE_PHOSPHORYLATION, 
                feature_label = F, logFCcollapse = .5, pval_collapse = 1e-30, 
                pt_size = .5, gs_size = .5)

p2 <- Volcano_Plot_GS(mut.cells.tmx.response$Neuron, 
                      label_features = hallmarks$HALLMARK_OXIDATIVE_PHOSPHORYLATION, 
                      feature_label = F, logFCcollapse = .5, pval_collapse = 1e-30, 
                      pt_size = .5, gs_size = .5)

p3 <- Volcano_Plot_GS(wt.cells.tmx.response$Neuron, 
                      label_features = hallmarks$HALLMARK_GLYCOLYSIS, 
                      feature_label = F, logFCcollapse = .5, pval_collapse = 1e-30, 
                      pt_size = .5, gs_size = .5)

p4 <- Volcano_Plot_GS(mut.cells.tmx.response$Neuron, 
                      label_features = hallmarks$HALLMARK_GLYCOLYSIS, 
                      feature_label = F, logFCcollapse = .5, pval_collapse = 1e-30, 
                      pt_size = .5, gs_size = .5)

p5 <- Volcano_Plot_GS(wt.cells.tmx.response$Neuron, 
                      label_features = neuroplus$GO_NEUROPEPTIDE_SIGNALING_PATHWAY, 
                      feature_label = F, logFCcollapse = .5, pval_collapse = 1e-30, 
                      pt_size = .5, gs_size = .5)

p6 <- Volcano_Plot_GS(mut.cells.tmx.response$Neuron, 
                      label_features = neuroplus$GO_NEUROPEPTIDE_SIGNALING_PATHWAY, 
                      feature_label = F, logFCcollapse = .5, pval_collapse = 1e-30, 
                      pt_size = .5, gs_size = .5)

save_ploty("~/Box/Tamoxifen paper/volcanoneurooxphos.png", plot = p1, base_height = 2, base_asp = 1)
save_ploty("~/Box/Tamoxifen paper/mutvolcanoneurooxphos.png", plot = p2, base_height = 2, base_asp = 1)
save_ploty("~/Box/Tamoxifen paper/volcanoneuroglyco.png", plot = p3, base_height = 2, base_asp = 1)
save_ploty("~/Box/Tamoxifen paper/mutvolcanoneuroglyco.png", plot = p4, base_height = 2, base_asp = 1)
save_ploty("~/Box/Tamoxifen paper/volcanoneuronpep.png", plot = p5, base_height = 2, base_asp = 1)
save_ploty("~/Box/Tamoxifen paper/mutvolcanoneuronpep.png", plot = p6, base_height = 2, base_asp = 1)


#here find individual genes to label on these volcano plots
temp <- wt.cells.tmx.response$Neuron
temp$gene <- toupper(temp$gene)

temp %>%
  filter(gene %in% neuroplus$GO_NEUROPEPTIDE_SIGNALING_PATHWAY) %>%
  arrange(avg_logFC)

temp %>%
  filter(gene %in% neuroplus$HALLMARK_GLYCOLYSIS) %>%
  filter(p_val_adj <= .05)

temp %>%
  filter(gene %in% neuroplus$HALLMARK_OXIDATIVE_PHOSPHORYLATION) %>%
  filter(p_val_adj <= .05)


#collapsed volcano plots
p1 <- BellagioPlot(wt.cells.tmx.response, logFCcollapse = .8, pt.size = .1, 
                   pt.alpha = .5, xlab.italic = F)
p2 <- BellagioPlot(mut.cells.tmx.response, logFCcollapse = .8, pt.size = .1,
                   pt.alpha = .5, xlab.italic = F)


DEGs_UpDown_Plot(wt.cells.tmx.response, return_table = T)
DEGs_UpDown_Plot(mut.cells.tmx.response, return_table = T)

save_ploty("~/Box/Tamoxifen paper/v20_allcells_bellagio.png", plot = p1, base_asp = 1.75, base_height = 2.5)
save_ploty("~/Box/Tamoxifen paper/v20_allmutcells_bellagio.png", plot = p2, base_asp = 1.6, base_height = 2.5)


#show expression of esr1 in WT vs KO cell types

p1 <- BellagioGeneSet(wt.cells.mut.response, features = c("Esr1"), padj.cutoff = 1, logFCcollapse = .5, 
                      label.alpha = 0, label.nudge = 0, cluster.label.ital = F, pt.size = .25, gs.pt.size = .5)
save_ploty("~/Box/Tamoxifen paper/v20_allcells_bellagio_esr1.png", plot = p1, base_asp = 1.75, base_height = 2.5)

#highlight potentially interesting genes as per reviewer request



p1 <- DEG_similarity_plot(mut.cells.tmx.response, wt.cells.tmx.response, 
                    cluster_name = "Neuron", data_shown = "y_sig", 
                    logFCcollapse = .5, pt.alpha = .5, label_logFC = .2) + NoLegend()

p2 <- DEG_similarity_plot(mut.cells.tmx.response, wt.cells.tmx.response, 
                    cluster_name = "Ependymal", data_shown = "y_sig", 
                    logFCcollapse = 1, pt.alpha = .5, label_logFC = .4) + NoLegend()

plot_grid(p2)


DEG_similarity_table(mut.cells.tmx.response, wt.cells.tmx.response, data_shown = "one_sig")

DEG_similarity_genes(wt.cells.tmx.response, mut.cells.tmx.response,
                     cluster_name = "Neuron", data_shown = "x_sig",
                     filename = "~/Box/Tamoxifen paper/v20_DEG_sim_genes_neurons.csv")

DEG_similarity_genes(wt.cells.tmx.response, mut.cells.tmx.response, 
                     cluster_name = "Ependymal", data_shown = "x_sig",
                     filename = "~/Box/Tamoxifen paper/v20_DEG_sim_genes_ependymal.csv")

DEG_similarity_table( wt.cells.tmx.response,mut.cells.tmx.response, data_shown = "one_sig")



save_ploty("~/Box/Tamoxifen paper/v20_DEG_sim_genes_neurons.png", plot = p1, base_asp = 1, base_height = 4)
save_ploty("~/Box/Tamoxifen paper/v20_DEG_sim_genes_ependymal.png", plot = p2, base_asp = 1, base_height = 4)

DEG_similarity_plot_GS(mut.cells.tmx.response, wt.cells.tmx.response, 
                       features = neuroplus$GO_NEUROPEPTIDE_SIGNALING_PATHWAY,
                    cluster_name = "Neuron", data_shown = "all", 
                    logFCcollapse = .5, pt.alpha = .5) + NoLegend()

DEG_similarity_plot_GS(mut.cells.tmx.response, wt.cells.tmx.response, 
                       features = hallmarks$HALLMARK_OXIDATIVE_PHOSPHORYLATION,
                       cluster_name = "Neuron", data_shown = "all", 
                       logFCcollapse = .5, pt.alpha = .5, label_features = F) + NoLegend()


DEG_similarity_plot_GS(mut.cells.tmx.response, wt.cells.tmx.response, 
                       features = neuroplus$GO_NEUROPEPTIDE_SIGNALING_PATHWAY,
                       cluster_name = "Neuron", data_shown = "y_sig", 
                       logFCcollapse = .5, pt.alpha = .5, label_features = T) + NoLegend()



DEG_similarity_plot_GS(mut.cells.tmx.response, wt.cells.tmx.response, 
                       features = "Npy",
                       cluster_name = "Neuron", data_shown = "all", 
                       logFCcollapse = .5, pt.alpha = .5) + NoLegend()

DEG_similarity_plot_GS(mut.neurons.tmx.response, neurons.tmx.response, 
                       features = neuropeptidehormone$V1,
                       cluster_name = "Gad2", data_shown = "all", 
                       logFCcollapse = 2, pt.alpha = .5) + NoLegend()

names(neurons.tmx.response)
  DEG_similarity_plot_GS(mut.neurons.tmx.response, neurons.tmx.response, 
                         features = neuropeptidehormone$V1,
                         cluster_name = "Tac2", data_shown = "all", 
                         logFCcollapse = 2.5, pt.alpha = .5) + NoLegend()

#this looks amazing - however both data sets compare to the same wild-type vehicle sample. probably not fair.
DEG_similarity_plot(wt.cells.mut.response, wt.cells.tmx.response, 
                    cluster_name = "Neuron", data_shown = "one_sig", 
                    logFCcollapse = .5) + NoLegend()


p1 <- DEG_similarity_plot(wt.cells.tmx.response, mut.cells.tmx.response, 
                          cluster_name = "Neuron", data_shown = "x_sig", 
                          logFCcollapse = .5, pt.alpha = .5) + NoLegend()

p3 <- DEG_similarity_plot(wt.cells.tmx.response, mut.cells.tmx.response, 
                          cluster_name = "Ependymal", data_shown = "x_sig", 
                          logFCcollapse = 1, pt.alpha = .5) + NoLegend()



save_ploty("~/Box/Tamoxifen paper/v20DEGsim_wttmx_muttmx_neurons.png", plot = p1, base_height = 2.25, base_asp = .975)
save_ploty("~/Box/Tamoxifen paper/DEGsim_wttmx_muttmx_ependymal.png", plot = p3, base_height = 2.25, base_asp = .95)

p4 <- DEG_similarity_plot(wt.cells.tmx.response, mut.cells.tmx.response, 
                          cluster_name = "Astro", data_shown = "x_sig", 
                          logFCcollapse = .5, pt.alpha = .5) + NoLegend()
save_ploty("~/Box/Tamoxifen paper/v20DEGsim_wttmx_muttmx_astro.png", plot = p4, base_height = 2.25, base_asp = .975)

p5 <- DEG_similarity_plot(wt.cells.tmx.response, mut.cells.tmx.response, 
                    cluster_name = "Oligo", data_shown = "x_sig", 
                    logFCcollapse = .5, pt.alpha = .5) + NoLegend()
save_ploty("~/Box/Tamoxifen paper/v20DEGsim_wttmx_muttmx_oligo.png", plot = p5, base_height = 2.25, base_asp = .975)


p6 <- DEG_similarity_plot(wt.cells.tmx.response, mut.cells.tmx.response, 
                    cluster_name = "Endo", data_shown = "x_sig", 
                    logFCcollapse = .5, pt.alpha = .5) + NoLegend()
save_ploty("~/Box/Tamoxifen paper/v20DEGsim_wttmx_muttmx_endo.png", plot = p6, base_height = 2.25, base_asp = .975)

p7 <- DEG_similarity_plot(wt.cells.tmx.response, mut.cells.tmx.response, 
                    cluster_name = "Microglia", data_shown = "x_sig", 
                    logFCcollapse = .5, pt.alpha = .5) + NoLegend()
save_ploty("~/Box/Tamoxifen paper/v20DEGsim_wttmx_muttmx_micro.png", plot = p7, base_height = 2.25, base_asp = .975)


p3 <- DEG_similarity_plot(wt.cells.tmx.response, mut.cells.tmx.response, 
                    cluster_name = "Polydendro", data_shown = "x_sig", 
                    logFCcollapse = .5, pt.alpha = .5) + NoLegend()


DEG_similarity_plot(wt.cells.tmx.response, mut.cells.tmx.response,  
                    cluster_name = "6", data_shown = "y_sig", logFCcollapse = 100,
                    pt.size = .75, pt.alpha = .5) + NoLegend() + ylim(-5,5) + xlim(-5,5)


save_ploty("~/Box/Tamoxifen paper/DEGsim_wttmx_muttmx_ployd.png", plot = p3, base_height = 2.25, base_asp = 1)

DEG_similarity_table(wt.cells.tmx.response, mut.cells.tmx.response,  data_shown = "one_sig") %>%
  write_csv("~/Box/Tamoxifen paper/DEG_sim_tables.csv")
DEG_similarity_table(wt.cells.mut.response, wt.cells.tmx.response, data_shown = "one_sig") %>%
  write_csv("~/Box/Tamoxifen paper/DEG_sim_tables2.csv")


DEG_similarity_table(wt.cells.tmx.response, mut.cells.tmx.response,  data_shown = "x_sig")

# cluster subtypes here ####

all.neurons <- subset(all.cells.integrated, ident = c("Neuron"))
all.astrocytes <- subset(all.cells.integrated, ident = c("Astro"))
all.ependymal <- subset(all.cells.integrated, ident = c("Ependymal"))
all.oligodendrocytes <- subset(all.cells.integrated, ident = c("Oligo"))

#neurons ####

DefaultAssay(object = all.neurons) <- "RNA"
VlnPlot(object = all.neurons, features = c("nFeature_RNA", "nCount_RNA", "percent.mt"), pt.size = 0)
dev.off()

wtv.n <- subset(all.neurons, subset = cond == "WtVehicle")
wtt.n <- subset(all.neurons, subset = cond == "WtTamoxifen")
mutv.n <- subset(all.neurons, subset = cond == "MutVehicle")
mutt.n <- subset(all.neurons, subset = cond == "MutTamoxifen")

wtv.n <- PercentageFeatureSet(object = wtv.n, pattern = "^mt-", col.name = "percent.mt")
wtt.n <- PercentageFeatureSet(object = wtt.n, pattern = "^mt-", col.name = "percent.mt")
mutv.n <- PercentageFeatureSet(object = mutv.n, pattern = "^mt-", col.name = "percent.mt")
mutt.n <- PercentageFeatureSet(object = mutt.n, pattern = "^mt-", col.name = "percent.mt")

wtv.n[['integrated']] <- NULL
wtt.n[['integrated']] <- NULL
mutv.n[['integrated']] <- NULL
mutt.n[['integrated']] <- NULL


wtv.n <- FindVariableFeatures(wtv.n, selection.method = "vst", nfeatures = 500, verbose = FALSE)
wtt.n <- FindVariableFeatures(wtt.n, selection.method = "vst", nfeatures = 500, verbose = FALSE)
mutv.n <- FindVariableFeatures(mutv.n, selection.method = "vst", nfeatures = 500, verbose = FALSE)
mutt.n <- FindVariableFeatures(mutt.n, selection.method = "vst", nfeatures = 500, verbose = FALSE)

VariableFeaturePlot(wtv.n)
VariableFeaturePlot(wtt.n)
VariableFeaturePlot(mutv.n)
VariableFeaturePlot(mutt.n)

neurons.list <- list()
neurons.list[[1]] <- wtv.n
neurons.list[[2]] <- wtt.n
neurons.list[[3]] <- mutv.n
neurons.list[[4]] <- mutt.n


options(future.globals.maxSize= 3670016000)

neuron.anchors <- FindIntegrationAnchors(object.list = neurons.list, dims = 1:30)

neurons.integrated <- IntegrateData(anchorset = neuron.anchors, dims = 1:30)

rm(wtv.n, wtt.n, mutv.n, mutt.n, neuron.anchors, neurons.list)

DefaultAssay(neurons.integrated) <- "integrated"
table(neurons.integrated@meta.data$orig.ident)
table(neurons.integrated@meta.data$geno)
table(neurons.integrated@meta.data$cond)

#scale neuron data and perform clustering####
neurons.integrated <- ScaleData(neurons.integrated, verbose = FALSE)
neurons.integrated <- RunPCA(object = neurons.integrated, npcs = 30, verbose = FALSE)
ElbowPlot(neurons.integrated)

j <- 20
neurons.integrated <- FindNeighbors(object = neurons.integrated, dims = 1:j)
neurons.integrated <- FindClusters(object = neurons.integrated, resolution = .7)
table(neurons.integrated@active.ident)

#UMAP and TSNE plots ####
neurons.integrated <- RunUMAP(object = neurons.integrated, reduction = "pca", dims = 1:j)
DimPlot(object = neurons.integrated, reduction = "umap", label = T, pt.size = .01) + NoLegend()

neurons.integrated <- RunTSNE(object = neurons.integrated, reduction = "pca", dims = 1:j)
DimPlot(object = neurons.integrated, reduction = "tsne", label = F, pt.size = .01)



neuron.markers <- FindAllMarkers(neurons.integrated, logfc.threshold = .5)
neuron.markers.top <- top_markers(neuron.markers, n_markers = 4)
#old version of rename can kill object. make sure to load new version
neurons.integrated <- rename_clusters(neurons.integrated, marker.list = neuron.markers)
table(neurons.integrated@active.ident)

p1 <- DimPlot(object = neurons.integrated, reduction = "umap", label = F, group.by = "cond", pt.size = .01,  
              cells = c(WhichCells(object = subset(x = neurons.integrated, subset = cond == "WtVehicle")), 
                        WhichCells(object = subset(x = neurons.integrated, subset = cond == "WtTamoxifen"))))
p6 <- DimPlot(object = neurons.integrated, reduction = "umap", label = T, pt.size = .01,  
              cells = c(WhichCells(object = subset(x = neurons.integrated, subset = cond == "WtVehicle")), 
                        WhichCells(object = subset(x = neurons.integrated, subset = cond == "WtTamoxifen"))),
              repel = T) 
p5 <- DimPlot(object = neurons.integrated, reduction = "umap", label = T, pt.size = .01,  
              cells = c(WhichCells(object = subset(x = neurons.integrated, subset = geno == "Wild-Type")))) +
  NoLegend()

p2 <- DimPlot(object = neurons.integrated, reduction = "umap", label = F, pt.size = .01) + NoLegend()

save_ploty("~/Box/Tamoxifen paper/v20umapneuronswtvtmx.png", plot = p1, base_asp = 1.5, base_height = 3)
save_ploty("~/Box/Tamoxifen paper/v20umapneuronsWTcells.png", plot = p5, base_asp = 1.1, base_height = 3.8)
save_ploty("~/Box/Tamoxifen paper/v20umapneuronslegend.png", plot = p6, base_asp = 1.2, base_height = 4)
save_ploty("~/Box/Tamoxifen paper/v20umapneuronsALLcells.png", plot = p2, base_asp = 1.2, base_height = 4)

p7 <- DimPlot(object = neurons.integrated, reduction = "umap", label = F, group.by = "cond", 
              pt.size = .01, cells = WhichCells(object = neurons.integrated, downsample = 2000))
save_ploty("~/Box/Tamoxifen paper/v20umapneurons4groups.png", plot = p7, base_asp = 1.4, base_height = 3.5)




levels(neurons.integrated@active.ident)

p1 <- DotPlot(subset(x = neurons.integrated, subset = geno == "Wild-Type"), features = rev(c("Esr1", "Esr2", "Gper1", "Pgr", levels(neurons.integrated@active.ident)))) +
  theme(axis.text.x = element_text(angle = 90, hjust = 1, vjust = .5, face = "italic")) + 
  scale_y_discrete(limits = rev(levels(neurons.integrated@active.ident)))

#top_markers can return dups which breaks dotplot, but levels(ident) works as above
DotPlot(neurons.integrated, features = c("Esr1", "Pgr", "Gper1", "Esr2",  rev(top_markers(neuron.markers)$genesig))) +
  theme(axis.text.x = element_text(angle = 90, hjust = 1, vjust = .5, face = "italic"))

DefaultAssay(neurons.integrated) <- "integrated"
DefaultAssay(neurons.integrated) <- "RNA"

p1 <- DotPlot(subset(x = neurons.integrated, subset = cond == "WtVehicle"), 
        features = c("Esr2", "Esr1", "Lepr", "Cckar", "Cckbr", "Kiss1r", "Il6ra", "Pdgfra", "Ghsr")) +
  theme(axis.text.x = element_text(angle = 90, hjust = 1, vjust = .5, face = "italic"))


save_ploty("~/Box/Tamoxifen paper/v20neuronmarkersDPnew.png", plot = p1, base_height = 5, base_asp = 1.3)









#neuron cluster DEGs####
DefaultAssay(neurons.integrated)
DefaultAssay(neurons.integrated) <- "RNA"

#test esr1 enrichment. This should not include KOs.
esr.marker.test.neurons <- FindAllMarkers(subset(x = neurons.integrated, subset = geno == "Wild-Type"), 
                                  features = c("Esr1", "Esr2", "Gper1", "Nkx2-1", "Pgr"), 
                                  min.pct = 0, logfc.threshold = 0, only.pos = T)


esr.marker.test.neurons.2 <- FindAllMarkers(neurons.integrated, 
                                            features = c("Esr1", "Esr2", "Gper1", "Nkx2-1", "Pgr"), 
                                            min.pct = 0, logfc.threshold = 0, only.pos = T)

neurons.mut.response <- cluster_DEGs(neurons.integrated, 
                                     condition_1 = "MutVehicle", condition_2 = "WtVehicle", 
                                     meta_slot = "cond", logfc.threshold = .01, min.pct = .01)
neurons.tmx.response <- cluster_DEGs(neurons.integrated, 
                                     condition_1 = "WtTamoxifen", condition_2 = "WtVehicle", 
                                     meta_slot = "cond", logfc.threshold = .01, min.pct = .01)
mut.neurons.tmx.response <- cluster_DEGs(neurons.integrated, 
                                         condition_1 = "MutTamoxifen", condition_2 = "MutVehicle", 
                                         meta_slot = "cond", logfc.threshold = .01, min.pct = .01)


BellagioGeneSet(neurons.mut.response, features = c("Esr1", "Pgr"), 
                logFCcollapse = 2, label.nudge = 0.1)

save_plot("~/Desktop/bellagioneuronsesrs.png", plot = p1)

p1 <- DEGs_UpDown_Plot(neurons.tmx.response)
p2 <- DEGs_UpDown_Plot(neurons.mut.response)
p3 <- DEGs_UpDown_Plot(mut.neurons.tmx.response)


p1 <- DEG_similarity_plot(neurons.tmx.response, neurons.mut.response, cluster_name = "8", logFCcollapse = 5)
p2 <- DEG_similarity_plot(neurons.tmx.response, mut.neurons.tmx.response, cluster_name = "8", logFCcollapse = 5)
plot_grid(p1, p2)


p1 <- BellagioPlot(neurons.tmx.response, pt.size = .25, xlab.italic = F)
BellagioPlot(mut.neurons.tmx.response)

save_ploty("~/Box/Tamoxifen paper/v20bellagioneurons.png", plot = p1, base_height = 3)



# neuron GSEAs ####

mut.neurons.tmx.gsea.estrogen <- Cluster_GSEA(mut.neurons.tmx.response, geneset = list(e2response$GO_RESPONSE_TO_ESTROGEN), sig.snapshot = F)
wt.neurons.tmx.gsea.estrogen <- Cluster_GSEA(neurons.tmx.response, geneset = list(e2response$GO_RESPONSE_TO_ESTROGEN), sig.snapshot = F)

cluster_GSEA_tree_plot(wt.neurons.tmx.gsea.estrogen, FDR = .05) + NoLegend()
cluster_GSEA_tree_plot(mut.neurons.tmx.gsea.estrogen, FDR = .05) + NoLegend()


wt.neurons.tmx.gsea.neuroplus$cluster <- factor(wt.neurons.tmx.gsea.neuroplus$cluster, 
                                                levels = c("Tac2", "Oxt", "Pomc", "Crym", "Foxb1", "Gad2.1", "Hdc", "Lingo2", "Ppp1r17", "Cdk8", "Gad2", "Gal", "Plp1", "Sst"), ordered = T)


p1 <- ggplot(data = wt.neurons.tmx.gsea.neuroplus,
       aes(x = interaction(pathway, cluster), y = NES)) + 
  geom_col() +
  theme_classic() + 
  theme(axis.text.x = element_text(angle = 90, hjust = 1, vjust = .5)) 

save_ploty("~/Box/Tamoxifen paper/neuronGSEAs_bars.png", plot = p1, base_height = 6, base_asp = .8)





cluster_GSEA_tree_plot <- function(cGSEA_out, down_first = T, GS_pathway = NULL, FDR = .05){
  cGSEA_out <- cGSEA_out %>%
    bind_rows() %>%
    mutate(sig = ifelse(padj < FDR, paste0("FDR < ", FDR), paste0("FDR > ", FDR)))
  if(!is.null(GS_pathway)){cGSEA_out <- cGSEA_out %>% filter(pathway == GS_pathway)}
  if(is.null(GS_pathway) & length(unique(cGSEA_out$pathway)) > 1){
    warning("More than one enrichment pathway. Designate GS_pathway")
    GS_pathway <- "Multiple"
  }
  if(down_first == F){
    ggplot(data = cGSEA_out, aes(x = reorder(cluster, -NES), y = NES, fill = sig)) +
      geom_bar(stat = "identity") + 
      theme_classic() + 
      scale_fill_manual(values = c("Black", "Gray")) +
      theme(axis.text.x = element_text(angle = 90, hjust = 1, vjust = .5)) +
      ggtitle(paste0("Pathway: ", GS_pathway))
    
  } else{
    ggplot(data = cGSEA_out, aes(x = reorder(cluster, NES), y = NES, fill = sig)) +
      geom_bar(stat = "identity") + 
      theme_classic() + 
      scale_fill_manual(values = c("Black", "Gray")) +
      theme(axis.text.x = element_text(angle = 90, hjust = 1, vjust = .5)) +
      ggtitle(paste0("Pathway: ", GS_pathway))
    
  }
}


cluster_2test_GSEA_tree <- function(GSEA_out_1, GSEA_out_2, GSEA_cond_1 = "GSEA_out_1", 
                                    GSEA_cond_2 = "GSEA_out_2", color_padj = .05){
  a <- GSEA_out_1 %>%
    bind_rows() 
  b <- GSEA_out_2 %>%
    bind_rows() 
  a$col <- GSEA_cond_1
  b$col <- GSEA_cond_2
  ab <- bind_rows(a,b)
  a <- a %>%
    arrange(padj)
  levs <- a$cluster
  ab <- ab %>%
    mutate(cluster = factor(cluster, levels = levs)) %>%
    mutate(col = ifelse(padj < color_padj, col, "NS"))
  p1 <- ggplot(data = ab) + 
    geom_point(aes(x = cluster, y = NES, size = -log10(padj), color = col, alpha = -log10(padj))) +
    theme_classic() +
    theme(axis.text.x = element_text(angle = 90, hjust = 1, vjust = .5)) +  
    scale_size_area(name = "Adj. P", breaks = c(1, 1.3, 3), labels = c(".1", ".05", ".01")) +
    scale_color_manual(values = c("Red", "Blue", "Gray")) + 
    guides(alpha = F)
  return(p1)
}

p1 <- cluster_2test_GSEA_tree(wt.neurons.tmx.gsea.estrogen, mut.neurons.tmx.gsea.estrogen, 
                              color_padj = .25)

save_ploty("~/Box/Tamoxifen paper/v20estrogenGSEAneurons.png", plot = p1, 
           base_height = 2.65, base_asp = 1.8)


#cluster proportions for stat testing in Prism####

glist <- list("mutant" = c("mutt1", "mutt2", "mutt3", "mutt4", "mutv1", "mutv2", "mutv3", "mutv4"),
              "wild-type" = c("wtt1", "wtt2", "wtt3", "wtt4", "wtt5", "wtv1", "wtv2", "wtv3"))

glist2 <- list("tamoxifen" = c("mutt1", "mutt2", "mutt3", "mutt4", "wtt1", "wtt2", "wtt3", "wtt4", "wtt5"),
               "vehicle" = c("mutv1", "mutv2", "mutv3", "mutv4", "wtv1", "wtv2", "wtv3"))


cluster_Proportions2 <- function(object, grouping_list = NULL, grouping_list2 = NULL){
  
  tib <- tibble("active.ident" = data.frame(object@active.ident)[,1],
                "orig.ident" = object@meta.data$orig.ident) %>%
    table() %>%
    sweep(2,colSums(.),`/`)  %>%
    data.frame() 
  if(!is.null(grouping_list)){
    lookup <- function(name, grouping_list){
      names(grouping_list[grep(name, grouping_list)])
    }
    tib$group1 <- sapply(X = tib$orig.ident, FUN = lookup, grouping_list = grouping_list) %>% as.factor()
    tib$group2 <- sapply(X = tib$orig.ident, FUN = lookup, grouping_list = grouping_list2) %>% as.factor()
    
  }
  return(tib)
}


props2 <- cluster_Proportions2(all.cells.integrated, glist, glist2)
props3 <- cluster_Proportions2(neurons.integrated, glist, glist2)

props2 %>%
  write.csv("~/Desktop/props.csv")

props2 %>%
  pivot_wider(names_from = active.ident, values_from = Freq) %>%
  write.csv("~/Desktop/props.csv")



